#make a new column which categorises the type of natural disaster as direct effetc of climate hcange, indirect effetc of climate chnage and not related
# Define vectors of natural disasters classified as primary and secondary effects of climate change
primary_effects <- c("Extreme temperature", "Flood", "Storm")
secondary_effects <- c("Glacial lake outburst flood", "Drought", "Wildfire", "Mass movement (wet)", " Mass movement (dry)")
# Create a new column indicating the effect of the natural disaster related to climate change
full_data <- updated_data %>%
mutate(climate_change_effect = case_when(
updated_data$type %in% primary_effects ~ "Primary effect",
updated_data$type %in% secondary_effects ~ "Secondary effect",
TRUE ~ "Not related"
))
full_data
## # A tibble: 9,505 × 16
## historic id classification group subgroup type subtype iso country
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 No 1999-9388… nat-cli-dro-d… Natu… Climato… Drou… Drought DJI Djibou…
## 2 No 1999-9388… nat-cli-dro-d… Natu… Climato… Drou… Drought SDN Sudan
## 3 No 1999-9388… nat-cli-dro-d… Natu… Climato… Drou… Drought SOM Somalia
## 4 No 2000-0002… nat-hyd-flo-r… Natu… Hydrolo… Flood Riveri… AGO Angola
## 5 No 2000-0003… nat-met-ext-c… Natu… Meteoro… Extr… Cold w… BGD Bangla…
## 6 No 2000-0008… nat-geo-vol-a… Natu… Geophys… Volc… Ash fa… GTM Guatem…
## 7 No 2000-0009… nat-met-sto-s… Natu… Meteoro… Storm Storm … IRN Iran (…
## 8 No 2000-0012… nat-hyd-flo-r… Natu… Hydrolo… Flood Riveri… MOZ Mozamb…
## 9 No 2000-0017… nat-cli-wil-l… Natu… Climato… Wild… Land f… ZAF South …
## 10 No 2000-0019… nat-hyd-flo-r… Natu… Hydrolo… Flood Riveri… BRA Brazil
## # ℹ 9,495 more rows
## # ℹ 7 more variables: subregion <chr>, region <chr>, location <chr>,
## # year <dbl>, month <dbl>, day <dbl>, climate_change_effect <chr>
The data set comes from the Centre for Research on the Epidemiology of Disasters (CRED). This organisation records every instances of natural disasters since 1900 within the EM-DAT database. This comprehensive open source database complies data from various sources; UN agencies, government agencies, research centers, humanitarian organisations, reinsurance companies and world press agencies. For a full list of sources see the EM-DAT website. I chose to download all the information regarding natural disasters between the year 1922 and 2022 totaling 16388 disasters. After looking at the data it was clear that the historic record before 2000 was too sparse to stand up against the quality of data recording conducted by CRED since its inception in 2000. Rather than looking at changes over a century using the historic record I have decided to focus on non historic entries of natural disasters which have occurred since 2000. Natural disaster between the year 2000 and 2022 totals 9505.
Has there been a change in prevalence of natural disasters types since the year 2000? Particularly has there been an increase in natural disaster related to climate change i.e. flooding?
What regions are most efffected by natural disasters?
To answer my research questions I am interested in where and when different natural disasters occurred. The following variables are of potential interest in asking these questions:
historic - Was the natural disaster before 2000 (when EM-DAT started recording natural disasters in real time)
subgroup - The disaster subgroup:
type - The specific type of disaster i.e., Drought or Earthquake
subtype - More detailed description of the natural disaster i.e., Flash Flood or Lightning
iso - 3-letter code referring to the Country.
country - Country where the disaster occurred and had an impact
subregion - Subregion where the disaster occurred
region - Region or continent where the disaster occurred
location - Geographical location name as specified in the sources, e.g., city name
year
month
day
climate_change_effect - whether the natural disaster is a primary effect of climate change, secondary effect of climate change or unrelated to climate change. Categorized based on EU report.
for further explanation of each variable see the codebook provided by the EM-DATA database
#summary of number of disaster per subgroup
full_data %>% group_by(subgroup) %>% summarise(count = n())
## # A tibble: 6 × 2
## subgroup count
## <chr> <int>
## 1 Biological 910
## 2 Climatological 678
## 3 Extra-terrestrial 1
## 4 Geophysical 760
## 5 Hydrological 4275
## 6 Meteorological 2881
#summary of number of disasters per subgroup per type
full_data %>% group_by(subgroup, type) %>% summarise(count = n())
## `summarise()` has grouped output by 'subgroup'. You can override using the
## `.groups` argument.
## # A tibble: 14 × 3
## # Groups: subgroup [6]
## subgroup type count
## <chr> <chr> <int>
## 1 Biological Animal incident 1
## 2 Biological Epidemic 880
## 3 Biological Infestation 29
## 4 Climatological Drought 393
## 5 Climatological Glacial lake outburst flood 3
## 6 Climatological Wildfire 282
## 7 Extra-terrestrial Impact 1
## 8 Geophysical Earthquake 626
## 9 Geophysical Mass movement (dry) 13
## 10 Geophysical Volcanic activity 121
## 11 Hydrological Flood 3852
## 12 Hydrological Mass movement (wet) 423
## 13 Meteorological Extreme temperature 479
## 14 Meteorological Storm 2402
#summary of number of disasters per subtype
full_data %>% group_by(subtype) %>% summarise(count = n())
## # A tibble: 45 × 2
## subtype count
## <chr> <int>
## 1 Animal incident 1
## 2 Ash fall 99
## 3 Avalanche (dry) 1
## 4 Avalanche (wet) 47
## 5 Bacterial disease 413
## 6 Blizzard/Winter storm 174
## 7 Coastal flood 41
## 8 Cold wave 219
## 9 Collision 1
## 10 Derecho 6
## # ℹ 35 more rows
# this variable may be to specific for the scope of my question
#summary of number of disasters per region
full_data %>% group_by(region) %>% summarise(count = n())
## # A tibble: 5 × 2
## region count
## <chr> <int>
## 1 Africa 2032
## 2 Americas 2180
## 3 Asia 3703
## 4 Europe 1232
## 5 Oceania 358
#summary of number of disasters per region per subregion
full_data %>% group_by(region, country) %>% summarise(count = n())
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
## # A tibble: 218 × 3
## # Groups: region [5]
## region country count
## <chr> <chr> <int>
## 1 Africa Algeria 49
## 2 Africa Angola 63
## 3 Africa Benin 31
## 4 Africa Botswana 13
## 5 Africa Burkina Faso 36
## 6 Africa Burundi 53
## 7 Africa Cabo Verde 9
## 8 Africa Cameroon 36
## 9 Africa Canary Islands 6
## 10 Africa Central African Republic 36
## # ℹ 208 more rows
#summary of number of disaster per year
full_data %>% group_by(year, region) %>% summarise(count = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## # A tibble: 115 × 3
## # Groups: year [23]
## year region count
## <dbl> <chr> <int>
## 1 2000 Africa 125
## 2 2000 Americas 101
## 3 2000 Asia 193
## 4 2000 Europe 94
## 5 2000 Oceania 12
## 6 2001 Africa 116
## 7 2001 Americas 95
## 8 2001 Asia 164
## 9 2001 Europe 52
## 10 2001 Oceania 19
## # ℹ 105 more rows
view(full_data)
graph_example <- full_data %>% group_by(subgroup, subtype, region, climate_change_effect) %>% summarise(count = n())
## `summarise()` has grouped output by 'subgroup', 'subtype', 'region'. You can
## override using the `.groups` argument.
view(graph_example)
#disaster by region - simple
ggplot(graph_example, aes(x = region, y = count)) +
geom_bar(stat = "identity", position = "dodge", fill = "dark green") +
labs(title = "Number of natural disasters between 2000 - 2022",
y = "Number of Disasters",
x = "Types of Disasters") +
ylim(0,1000) +
theme_classic()
#by type
ggplot(graph_example, aes(x = subgroup, y = count)) +
geom_bar(stat = "identity", position = "dodge", fill = "dark green") +
labs(title = "Number of natural disasters between 2000 - 2022",
y = "Number of Disasters",
x = "Types of Disasters") +
ylim(0,1000) +
theme_classic()
#build up graph - split by region
ggplot(graph_example, aes(x = subgroup, y = count, fill = region)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Number of natural disasters between 2000-2022",
y = "Number of Disasters",
x = "Types of Disasters") +
ylim(0,1000) +
theme_classic()
#total natural disasters
year_totals <- updated_data %>% group_by(year, region) %>% summarise(count = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
ggplot(year_totals, aes(x = year, y =count, fill = region)) +
geom_bar(stat = "identity") +
labs(title = "total number of natural disasters which have occured since 2000 split by region") +
theme_classic()
#total number of natural disaster per region split by climate change
ggplot(graph_example, aes(x = region, y = count, fill = climate_change_effect)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Number of natural disasters between 2000-2022",
y = "Number of Disasters",
x = "Types of Disasters",
fill = "climate change effect") +
ylim(0,1000) +
theme_classic()
#want a line graph which charts changes in prevalence over time split by the 3 conditions.
# graph where x is years, y is prevalance
overall_disaster <- full_data %>%
group_by(year) %>%
summarise(total_disasters = n())
ggplot(full_data, aes(x = year, y = after_stat(count), color = climate_change_effect)) +
geom_line(stat = "count") +
geom_line(data = overall_disaster, aes(y = total_disasters, color = "Total Disasters")) +
labs(x = "Year", y = "Prevalence", color = "Climate Change Effect") +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
theme_minimal()
#exploring extra
ggplot(full_data, aes(x = year, y = after_stat(count), color = type)) +
geom_line(stat = "count") +
labs(x = "Year", y = "Prevalence", color = "type of disaster") +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
theme_minimal()
ggplot(full_data, aes(x = year, y = after_stat(count), color = subgroup)) +
geom_line(stat = "count") +
labs(x = "Year", y = "Prevalence", color = "category of disaster") +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
theme_minimal()
total_disasters <- full_data %>%
group_by(year, region) %>%
summarise(total_disasters = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plotting
ggplot(full_data, aes(x = year, y = after_stat(count), color = climate_change_effect)) +
geom_line(stat = "count") +
geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
labs(x = "Year", y = "Prevalence", color = "Climate Change Effect", linetype = "Legend") +
scale_color_manual(values = c("dark green", "red", "blue", "grey")) +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022 broken down per region") +
facet_wrap(~ region, scales = "free_x", nrow = 3)
#how can i make my maps interactive
interactive_plot <- ggplotly(
ggplot(full_data, aes(x = year, y = after_stat(count), color = climate_change_effect)) +
geom_line(stat = "count") +
geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
labs(x = "Year", y = "Prevalence", color = "Climate Change Effect", linetype = "Legend") +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
theme_minimal() +
scale_color_manual(values = c("grey", "red", "orange", "black")) +
facet_wrap(~ region, scales = "free_x", nrow = 3)
)
# Show interactive plot
interactive_plot
interactive_plot2 <- ggplotly(
ggplot(full_data, aes(x = year, y = after_stat(count), color = subgroup)) +
geom_line(stat = "count") +
geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
labs(x = "Year", y = "Prevalence", color = "subgroup", linetype = "Legend") +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
theme_minimal() +
scale_color_manual(values = c("grey", "red", "orange", "black", "green", "pink", "purple")) +
facet_wrap(~ region, scales = "free_x", nrow = 3)
)
interactive_plot2
interactive_plot3 <- ggplotly(
ggplot(full_data, aes(x = year, y = after_stat(count), color = subgroup)) +
geom_line(stat = "count") +
geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
labs(x = "Year", y = "Prevalence", color = "subgroup", linetype = "Legend") +
ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
theme_minimal() +
scale_color_manual(values = c("grey", "red", "orange", "black", "green", "pink", "purple")) )
interactive_plot3